import re

import requests

child_href_list = []
domain = "https://dytt89.com"
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36 Edg/130.0.0.0'
}
res = requests.get(domain, headers=headers)
res.encoding = 'gb2312'

# 匹配主页面热门榜单
obj1 = re.compile(r"2024必看热片.*?<ul>(?P<ul>.*?)</ul>",re.S)

# 匹配榜单下每一项的地址
obj2 = re.compile(r"<a href='(?P<href>.*?)'",re.S)

# 匹配地址请求所响应内容中的下载链接
obj3 = re.compile(r'◎片　　名　(?P<movie>.*?)<br />.*?'
                  r'<td style="WORD-WRAP: break-word" bgcolor="#fdfddf"><a href="(?P<dowload_href>.*?)">magnet:\?xt=urn',re.S)

# 提取到热门榜单
result1 = obj1.finditer(res.text)
for it in result1:
    ul = it.group("ul")

    # 提取到榜单下各项url
    result2 = obj2.finditer(ul)
    for ii in result2:
        li = ii.group("href")
        child_href_list.append(domain+li)

# 提取请求各url响应内容
for href in child_href_list:
    child_res = requests.get(href,headers=headers)
    child_res.encoding = "gb2312"

    # 提取内容中的下载链接
    result3 = obj3.search(child_res.text)
    print(result3.group("dowload_href"))
    child_res.close()
    break

res.close()


